/**********************************************************************/
/*
      Title: baseline_susenas_clean.do
			Author: Robbie Dulin, Clotaire Boyer
			Created: 20 Jan 2022
    	Description: Prepares the 2016, 2017, and 2018 SUSENAS data for
				LASSO selection.
*/
/**********************************************************************/

/*----------------------------------------------------*/
                 /* Section: Setup */
/*----------------------------------------------------*/

clear
set more off


cap log close
local prefix: display %tdCYND td(`c(current_date)')
log using "$log/`prefix'_baseline_susenas_clean", replace text

/*----------------------------------------------------*/
    /* Section: Create Mar 2016 Baseline Variables*/
/*----------------------------------------------------*/

// load Mar 2016 household dataset
u "$importable/susenas16mar_kr.dta", clear

** House variables

tab r1501
rename r1501 n_hh_house

summ r1503
rename r1503 area

tab r1502
gen ownhouse = r1502 == 1
gen lease = r1502 == 2
gen freelease = r1502 == 3
gen companyhouse = r1502 == 4
gen otherhouse = r1502 == 5
summ ownhouse lease freelease companyhouse otherhouse

tab r1504
gen concreteroof = r1504 == 1
gen tileroof = r1504 == 2
gen metalroof = r1504 == 3
gen clayroof = r1504 == 4
gen asbestosroof = r1504 == 5
gen ironsheetroof = r1504 == 6
gen bambooroof = r1504 == 7
gen shinglewoodroof = r1504 == 8
gen fiberpalmroof = r1504 == 9
gen otherroof = r1505 == 10
summ *roof

tab r1505
gen concretewall = r1505 == 1
gen wovenbamboowall = r1505 == 2
gen woodwall = r1505 == 3
gen wovenwoodwall = r1505 == 4
gen logwoodwall = r1505 == 5
gen bamboowall = r1505 == 6
gen otherwall = r1505 == 7
summ *wall

tab r1506
gen marblegranitefloor = r1506 == 1
gen ceramicfloor = r1506 == 2
gen parquettevinylfloor = r1506 == 3
gen tilefloor = r1506 == 4
gen highqualitywoodfloor = r1506 == 5
gen cementbrickfloor = r1506 == 6
gen bamboofloor = r1506 == 7
gen lowqualitywoodfloor = r1506 == 8
gen soilfloor = r1506 == 9
gen otherfloor = r1506 == 10
summ *floor

** Toilet variables
tab r1507a

gen personaltoilet = r1507a == 1
gen mutualtoilet = r1507a == 2
gen communaltoilet = r1507a == 3
gen notusetoilet = r1507a == 4
gen noaccesstotoilet = r1507a == 5
summ *toilet

tab r1507b
gen goosetoilet = r1507b == 1
gen closedpitttoilet = r1507b == 2
gen unclosedpitttoilet = r1507b == 3
gen squattoilet = r1507b == 4
gen notoilet = missing(r1507b)

tab r1507a r1507b
gen personalgoosetoilet = personaltoilet * goosetoilet
gen personalclosedpitttoilet = personaltoilet * closedpitttoilet
gen personalunclosedpitttoilet = personaltoilet * unclosedpitttoilet
gen personalsquattoilet = personaltoilet * squattoilet
summ personal?*toilet

gen mutualgoosetoilet = mutualtoilet * goosetoilet
gen mutualclosedpitttoilet = mutualtoilet * closedpitttoilet
gen mutualunclosedpitttoilet = mutualtoilet * unclosedpitttoilet
gen mutualsquattoilet = mutualtoilet * squattoilet
summ mutual?*toilet

gen communalgoosetoilet = communaltoilet * goosetoilet
gen communalclosedpitttoilet = communaltoilet * closedpitttoilet
gen communalunclosedpitttoilet = communaltoilet * unclosedpitttoilet
gen communalsquattoilet = communaltoilet * squattoilet
summ communal?*toilet

** Asset vars

tab r1701a
gen lpg5kgmore = r1701a == 1
summ lpg5kgmore

tab r1701b
gen refrigerator = r1701b == 1
summ refrigerator

tab r1701c
gen airconditioner = r1701c == 1
summ airconditioner

tab r1701d
gen waterheater = r1701d == 1
summ waterheater

tab r1701e
gen landline = r1701e == 1
summ landline

tab r1701f
gen computer = r1701f == 1
summ computer

tab r1701g
gen goldjewelerymin10gram = r1701g == 1
summ goldjewelerymin10gram

tab r1701h
gen motorcycle = r1701h == 1
summ motorcycle

tab r1701i
gen boat = r1701i == 1
summ boat

tab r1701j
gen motorboat = r1701j == 1
summ motorboat

tab r1701k
gen car = r1701k == 1
summ car

** water variables
tab r1507c
gen septicdisposal = r1507c == 1 | r1507c == 2
gen pooldisposal = r1507c == 3
gen riverdisposal = r1507c == 4
gen pitholedisposal = r1507c == 5
gen opendisposal = r1507c == 6
gen otherdisposal = r1507c == 7
summ *disposal

tab r1508a
gen bottleddrinkwater = r1508a == 1
gen refilldrinkwater = r1508a == 2
gen meteredtapdrinkwater = r1508a == 3
gen retailtapdrinkwater = r1508a == 4
gen pumpdrinkwater = r1508a == 5
gen protwelldrinkwater = r1508a == 6
gen unprotwelldrinkwater = r1508a == 7
gen protspringdrinkwater = r1508a == 8
gen unprotspringdrinkwater = r1508a == 9
gen riverdrinkwater = r1508a == 10
gen raindrinkwater = r1508a == 11
gen otherdrinkwater = r1508a == 12
summ bottleddrinkwater refilldrinkwater meteredtapdrinkwater retailtapdrinkwater pumpdrinkwater protwelldrinkwater unprotwelldrinkwater protspringdrinkwater unprotspringdrinkwater riverdrinkwater raindrinkwater otherdrinkwater

tab r1509
gen personalwater = r1509 == 1
gen mutualwater = r1509 == 2
gen communalwater = r1509 == 3
gen notusewater = r1509 == 4
gen noaccesswater = r1509 == 5

tab r1510
gen buyingdrinkwater = r1510 == 1
gen subscribedrinkwater = r1510 == 2
gen notbuyingdrinkwater = r1510 == 3

tab r1511a
gen bottledcookwater = r1511a == 1
gen refillcookwater = r1511a == 2
gen meteredtapcookwater = r1511a == 3
gen retailtapcookwater = r1511a == 4
gen pumpcookwater = r1511a == 5
gen protectedwellcookwater = r1511a == 6
gen unprotectedwellcookwater = r1511a == 7
gen protectedspringcookwater = r1511a == 8
gen unprotspringcookwater = r1511a == 9
gen rivercookwater = r1511a == 10
gen raincookwater = r1511a == 11
gen othercookwater = r1511a == 12
gen cookwatersource13 = r1511a == 96

tab r1512a
gen bottledwashwater = r1512a == 1
gen refillwashwater = r1512a == 2
gen meteredtapwashwater = r1512a == 3
gen retailtapwashwater = r1512a == 4
gen pumpwashwater = r1512a == 5
gen protectedwellwashwater = r1512a == 6
gen unprotectedwellwashwater = r1512a == 7
gen protectedspringwashwater = r1512a == 8
gen unprotspringwashwater = r1512a == 9
gen riverwashwater = r1512a == 10
gen rainwashwater = r1512a == 11
gen otherwashwater = r1512a == 12
summ *washwater

tab r1513
gen publicpipewater = r1513 == 1
gen publichydrantwater = r1513 == 2
summ public?*water

tab r1508b
tab r1508a
gen cleandrinkwater = r1508b
replace cleandrinkwater = 0 if r1508b != .
replace cleandrinkwater = 1 if inlist(r1508a, 1, 2, 3, 4, 10, 11, 12)
summ cleandrinkwater

tab r1511b
tab r1511a
gen cleancookwater = r1511b
replace cleancookwater = 0 if r1511b != .
replace cleancookwater = 1 if inlist(r1511a, 1, 2, 3, 4, 10, 11, 12)
summ cleancookwater

tab r1512b
tab r1512a
gen cleanwashwater = r1512b
replace cleanwashwater = 0 if r1512b != .
replace cleanwashwater = 1 if inlist(r1512a, 1, 2, 3, 4, 10, 11, 12)
summ cleanwashwater

rename exp_cap month_pcexp

** Demographic variables
// merge in individual data
merge 1:m urut using "$importable/susenas16mar_ki.dta"
assert _m == 3
drop _m

tab r405
gen male = r405 == 1
bysort urut: egen totalmale = total(male)
drop male

gen female = r405 == 2
bysort urut: egen totalfemale = total(female)
drop female

tab r403
gen child = r403 == 3
bysort urut: egen totalchild = total(child)
drop child

** Telecommunication
tab r1004a
gen owncell_ind = r1004a == 1
bysort urut: egen n_cell_hh = total(owncell_ind)
gen own_cell = n_cell_hh > 0
drop owncell_ind
summ n_cell_hh own_cell

tab r1005
gen usecomputer_ind = r1005 == 1
bysort urut: egen n_computer_hh = total(usecomputer_ind)
gen usecomputer = n_computer_hh > 0
drop usecomputer_ind
summ n_computer_hh usecomputer

tab r1006
gen useinternet_ind = r1006 == 1
bysort urut: egen n_internet_hh = total(useinternet_ind)
gen useinternet = n_internet_hh > 0
summ useinternet n_internet_hh

tab r1008a
gen homeinternet_ind = r1008a == 1
bysort urut: egen n_homeinternet_hh = total(homeinternet_ind)
gen homeinternet = n_homeinternet_hh > 0
drop homeinternet_ind
summ homeinternet n_homeinternet_hh

tab r1008b
gen notownhomeinternet_ind = r1008b == 1
bysort urut: egen n_notownhomeinternet_hh = total(notownhomeinternet_ind)
gen notownhomeinternet = n_notownhomeinternet_hh > 0
drop notownhomeinternet_ind
summ notownhomeinternet n_notownhomeinternet_hh

tab r1008c
gen officeinternet_ind = r1008c == 1
bysort urut: egen n_officeinternet_hh = total(officeinternet_ind)
gen officeinternet = n_officeinternet_hh > 0
drop officeinternet_ind
summ officeinternet n_officeinternet_hh

tab r1008d
gen schoolinternet_ind = r1008d == 1
bysort urut: egen n_schoolinternet_hh = total(schoolinternet_ind)
gen schoolinternet = n_schoolinternet_hh > 0
drop schoolinternet_ind
summ schoolinternet n_schoolinternet_hh

tab r1008e
gen publicinternet_ind = r1008e == 1
bysort urut: egen n_publicinternet_hh = total(publicinternet_ind)
gen publicinternet = n_publicinternet_hh > 0
drop publicinternet_ind
summ n_publicinternet_hh publicinternet

tab r1008f
gen vehicleinternet_ind = r1008f == 1
bysort urut: egen n_vehicleinternet_hh = total(vehicleinternet_ind)
gen vehicleinternet = n_vehicleinternet_hh > 0
drop vehicleinternet_ind
summ vehicleinternet n_vehicleinternet_hh

tab r1009a
gen int_gettinginformation_ind = r1009a == 1
bysort urut: egen n_int_gettinginformation = total(int_gettinginformation_ind)
gen int_gettinginformation = n_int_gettinginformation > 0
summ int_gettinginformation n_int_gettinginformation
drop int_gettinginformation_ind n_int_gettinginformation

tab r1009b
gen int_dohomework_ind = r1009b == 1
bysort urut: egen n_int_dohomework = total(int_dohomework_ind)
gen int_dohomework = n_int_dohomework > 0
summ int_dohomework n_int_dohomework
drop n_int_dohomework int_dohomework_ind

tab r1009c
gen int_email_ind = r1009c == 1
bysort urut: egen n_int_email = total(int_email_ind)
gen int_email = n_int_email > 0
summ int_email n_int_email
drop n_int_email int_email_ind

tab r1009d
gen int_socialmedia_ind = r1009d == 1
bysort urut: egen n_int_socialmedia = total(int_socialmedia_ind)
gen int_socialmedia = n_int_socialmedia > 0
summ int_socialmedia n_int_socialmedia
drop n_int_socialmedia int_socialmedia_ind

tab r1009e
gen int_buysellservice_ind = r1009e == 1
bysort urut: egen n_int_buysellservice = total(int_buysellservice_ind)
gen int_buysellservice = n_int_buysellservice > 0
summ int_buysellservice n_int_buysellservice
drop n_int_buysellservice int_buysellservice_ind

tab r1009f
gen int_recreational_ind = r1009f == 1
bysort urut: egen n_int_recreational = total(int_recreational_ind)
gen int_recreational = n_int_recreational > 0
summ int_recreational n_int_recreational
drop n_int_recreational int_recreational_ind

tab r1009g
gen int_financial_ind = r1009g == 1
bysort urut: egen n_int_financial = total(int_financial_ind)
gen int_financial = n_int_financial > 0
summ int_financial n_int_financial
drop n_int_financial int_financial_ind

tab r1009h
gen int_other_ind = r1009h == 1
bysort urut: egen n_int_other = total(int_other_ind)
gen int_other = n_int_other > 0
summ int_other n_int_other
drop n_int_other int_other_ind

** Energy
tab r1514
gen PLNelectricity = r1514 == 1
gen nonPLNelectricity = r1514 == 2
gen nonelectricity = r1514 == 3
summ *electricity

tab r1515
gen electricity = r1515 == 1
gen lpgas5kg = r1515 == 2
gen lpgas12kg = r1515 == 3
gen lpgas3kg = r1515 == 4
gen gasorbiogas = r1515 == 5
gen kerosene = r1515 == 6
gen briquettecoal = r1515 == 7
gen charcoal = r1515 == 8
gen firewood = r1515 == 9
gen otherenergy = r1515 == 10
gen notcookingathome = r1515 == 0

// drop duplicates within household
duplicates drop urut, force

gen urban = r105 == 1
rename r101 PROVINSI
rename r102 KABU
drop r1* r2* r3* r4* r5* r6* r7* r8* r9* *lai *khir catatan food nfood expend fwt_tahun

order PROVINSI kabu urban urut, first
// rename
foreach var of var* {
	rename `var' mar16_`var'
}

rename *PROVINSI PROVINSI
rename *kabu kabu
rename *urban urban
rename *urut urut
drop *KABU
rename *fwt fwt

// rename to match merge
rename kabu id_jan14

tempfile sus16
save `sus16', replace



/*----------------------------------------------------*/
   /* Section: Create Mar 2017 Baseline Variables */
/*----------------------------------------------------*/

// load Mar 2017 household dataset
u "$importable/susenas17mar_kor17rt_diseminasi.dta", clear

** House variables
rename r1601a n_hh_house
tab n_hh_house
rename r1604 area
tab area

tab r1602
gen ownhouse = r1602 == 1
gen lease = r1602 == 2
gen freelease = r1602 == 3
gen companyhouse = r1602 == 4
gen otherhouse = r1602 == 5

tab r1607
gen concreteroof = r1607 == 1
gen tileroof = r1607 == 2
gen asbestosroof = r1607 == 3
gen ironsheetroof = r1607 == 4
gen bambooroof = r1607 == 5
gen shinglewoodroof = r1607 == 6
gen fiberpalmroof = r1607 == 7
gen otherroof = r1607 == 8

tab r1608
gen concretewall = r1608 == 1
gen wovenbamboowall = r1608 == 2
gen woodwall = r1608 == 3
gen wovenwoodwall = r1608 == 4
gen logwoodwall = r1608 == 5
gen bamboowall = r1608 == 6
gen otherwall = r1608 == 7

tab r1609
gen marblegranitefloor = r1609 == 1
gen ceramicfloor = r1609 == 2
gen parquettevinylfloor = r1609 == 3
gen tilefloor = r1609 == 4
gen highqualitywoodfloor = r1609 == 5
gen cementbrickfloor = r1609 == 6
gen bamboofloor = r1609 == 7
gen soilfloor = r1609 == 8
gen otherfloor = r1609 == 9

** Toilet variables
tab r1610a
gen personaltoilet = r1610a == 1
gen mutualtoilet = r1610a == 2
gen communaltoilet = r1610a == 3
gen notusetoilet = r1610a == 4
gen noaccesstotoilet = r1610a == 5

tab r1610b
gen goosetoilet = r1610b == 1
gen  byte closedpitttoilet = r1610b == 2
gen unclosedpitttoilet = r1610b == 3
gen squattoilet = r1610b == 4
gen notoilet = r1610b == .

tab r1610a r1610b
gen personalgoosetoilet = personaltoilet * goosetoilet
gen personalclosedpitttoilet = personaltoilet * closedpitttoilet
gen personalunclosedpitttoilet = personaltoilet * unclosedpitttoilet
gen personalsquattoilet = personaltoilet * squattoilet

gen mutualgoosetoilet = mutualtoilet * goosetoilet
gen mutualclosedpitttoilet = mutualtoilet * closedpitttoilet
gen mutualunclosedpitttoilet = mutualtoilet * unclosedpitttoilet
gen mutualsquattoilet = mutualtoilet * squattoilet


** Asset vars
gen lpg5kgmore	= r2201a == 1
gen refrigerator = r2201b == 1
gen airconditioner = r2201c == 1
gen waterheater = r2201d == 1
gen landline = r2201e == 1
gen computer = r2201f == 1
gen goldjewelerymin10gram = r2201g == 1
gen motorcycle = r2201h == 1
gen boat = r2201i == 1
gen motorboat = r2201j == 1
gen car = r2201k == 1
summ lpg5kgmore refrigerator airconditioner waterheater landline computer goldjewelerymin10gram motorcycle boat motorboat car

** Water vars
tab r1610d
gen septicdisposal = r1610d == 1
gen pooldisposal = r1610d == 2
gen riverdisposal = r1610d == 3
gen pitholedisposal = r1610d == 4
gen opendisposal = r1610d == 5
gen otherdisposal = r1610d == 6

tab r1611a
gen bottleddrinkwater = r1611a == 1
gen refilldrinkwater = r1611a == 2
gen tapdrinkwater = r1611a == 3
gen pumpdrinkwater = r1611a == 4
gen protwelldrinkwater = r1611a == 5
gen unprotwelldrinkwater = r1611a == 6
gen protspringdrinkwater = r1611a == 7
gen unprotspringdrinkwater = r1611a == 8
gen riverdrinkwater = r1611a == 9
gen raindrinkwater = r1611a == 10
gen otherdrinkwater = r1611a == 11

tab r1616a
gen bottledcookwater = r1616a == 1
gen refillcookwater = r1616a == 2
gen tapcookwater = r1616a == 3
gen pumpcookwater = r1616a == 4
gen protectedwellcookwater = r1616a == 5
gen unprotectedwellcookwater = r1616a == 6
gen protectedspringcookwater = r1616a == 7
gen unprotectedspringcookwater = r1616a == 8
gen rivercookwater = r1616a == 9
gen raincookwater = r1616a == 10
gen othercookwater = r1616a == 11

tab r1611a
tab r1611b
gen cleandrinkwater = r1611b
replace cleandrinkwater = 0 if cleandrinkwater != .
replace cleandrinkwater = 1 if inlist(r1611a, 1, 2, 3, 9, 10, 11)

tab r1616b
tab r1616a
gen cleancookwater = r1616b
replace cleancookwater = 0  if cleancookwater != .
replace cleancookwater = 1 if inlist(r1616a, 1, 2, 3, 9, 10, 11)


** Energy vars
tab r1618a
gen PLNelectricity = r1618a == 1 | r1618a == 2
gen nonPLNelectricity = r1618a == 3
gen nonelectricity = r1618a == 5

tab r1619
gen electricity = r1619 == 1
gen lpgas5kg = r1619 == 2
gen lpgas12kg = r1619 == 3
gen lpgas3kg = r1619 == 4
gen gasorbiogas = r1619 == 5 | r1619 == 6
gen kerosene = r1619 == 7
gen briquettecoal = r1619 == 8
gen charcoal = r1619 == 9
gen firewood = r1619 == 10
gen otherenergy = r1619 == 11
gen notcookingathome = r1619 == 0

*** Variables from individual survey
merge 1:m renum using "$importable/susenas17mar_kor17ind_a_diseminasi.dta"
assert _m == 3
drop _m

** Demographic variables
gen male = r405 == 1
bysort renum: egen totalmale = total(male)
drop male

gen female = r405 == 2
bysort renum: egen totalfemale = total(female)
drop female

gen child = r403 == 3
bysort renum: egen totalchild = total(child)
drop child

summ totalmale totalfemale totalchild

** Telecommunication

gen cell = r702 == 1
bysort renum: egen n_cell_hh = total(cell)
gen own_cell = n_cell_hh > 0
summ n_cell_hh own_cell

gen computeruse = r703 == 1
bysort renum: egen n_computer_hh = total(computeruse)
gen usecomputer = n_computer_hh > 0
summ n_computer_hh usecomputer

gen internet = r704 == 1
bysort renum: egen n_internet_hh = total(internet)
gen useinternet = n_internet_hh > 0
summ useinternet n_internet_hh
drop internet

gen homeinter = !missing(r706_a)
bysort renum: egen n_homeinternet_hh = total(homeinter)
gen homeinternet = n_homeinternet_hh > 0
summ homeinternet n_homeinternet_hh
drop homeinter

gen notownint = !missing(r706_b)
bysort renum: egen n_notownhomeinternet_hh = total(notownint)
gen notownhomeinternet = n_notownhomeinternet_hh > 0
summ n_notownhomeinternet_hh notownhomeinternet
drop notownint

gen officeint = !missing(r706_c)
bysort renum: egen n_officeinternet_hh = total(officeint)
gen officeinternet = n_officeinternet_hh > 0
summ officeinternet n_officeinternet_hh
drop officeint

gen schoolint = !missing(r706_d)
bysort renum: egen n_schoolinternet_hh = total(schoolint)
gen schoolinternet = n_schoolinternet_hh > 0
summ schoolinternet n_schoolinternet_hh
drop schoolint

gen publicint_ind = !missing(r706_e)
bysort renum: egen n_publicinternet_hh = total(publicint_ind)
gen publicinternet = n_publicinternet_hh > 0
summ publicinternet n_publicinternet_hh
drop publicint_ind

gen vehicleinternet_ind = !missing(r706_g)
bysort renum: egen n_vehicleinternet_hh = total(vehicleinternet_ind)
gen vehicleinternet = n_vehicleinternet_hh > 0
summ vehicleinternet n_vehicleinternet_hh
drop vehicleinternet_ind

gen int_gettinginformation_ind = !missing(r707_a)
bysort renum: egen n_int_gettinginformation = total(int_gettinginformation_ind)
gen int_gettinginformation = n_int_gettinginformation > 0
summ int_gettinginformation
drop int_gettinginformation_ind n_int_gettinginformation

gen int_dohomework_ind = !missing(r707_b)
bysort renum: egen n_int_dohomework = total(int_dohomework_ind)
gen int_dohomework = n_int_dohomework > 0
summ int_dohomework
drop int_dohomework_ind n_int_dohomework

gen int_email_ind = !missing(r707_c)
bysort renum: egen n_int_email = total(int_email_ind)
gen int_email = n_int_email > 0
summ int_email
drop n_int_email

gen int_socialmedia_ind = !missing(r707_d)
bysort renum: egen n_int_socialmedia = total(int_socialmedia_ind)
gen int_socialmedia = n_int_socialmedia > 0
summ int_socialmedia
drop n_int_socialmedia

gen int_buysellservice_ind = !missing(r707_e) | !missing(r707_f)
bysort renum: egen n_int_buysellservice_ind = total(int_buysellservice_ind)
gen int_buysellservice = n_int_buysellservice_ind > 0
summ int_buysellservice
drop n_int_buysellservice_ind int_buysellservice_ind

gen int_recreational_ind = !missing(r707_g)
bysort renum: egen n_int_recreational = total(int_recreational_ind)
gen int_recreational = n_int_recreational > 0
summ int_recreational
drop int_recreational_ind n_int_recreational

gen int_financial_ind = !missing(r707_h)
bysort renum: egen n_int_financial = total(int_financial_ind)
gen int_financial = n_int_financial > 0
summ int_financial
drop int_financial_ind n_int_financial

gen int_other_ind = !missing(r707_j)
bysort renum: egen n_int_other = total(int_other_ind)
gen int_other = n_int_other > 0
summ int_other
drop int_other_ind n_int_other


duplicates drop renum, force

** monthly per capita expenditure
rename exp_cap month_pcexp
summ month_pcexp
// tostring PROVINSI, replace
// tostring KABU, replace

rename r101 PROVINSI
rename r102 KABU
gen urban = r105 == 1

// Create kabu code
tostring PROVINSI, replace
tostring KABU, replace
gen zero = "0"
replace KABU = zero + KABU if inlist(KABU, "1", "2", "3", "4", "5", "6", "7", "8", "9")
gen kabu = PROVINSI + KABU
drop KABU
drop zero
destring kabu, replace


drop r1* r2* r3* r4* r5* r6* r7* r8* r9* r*1* r*2* r*3* r*4* nu* catatan *lai rjam* rmen* *food expend


foreach var of var* {
	rename `var' mar17_`var'
}
rename *PROVINSI PROVINSI
rename *kabu kabu
rename *urut urut
rename *fwt fwt
rename *urban urban


* address split districts
gen id_mar17 = kabu
rename kabu id_jan14
replace id_jan14 = 7402 if id_jan14 == 7413 // Muna Barat, split from Muna
replace id_jan14 = 7401 if id_jan14 == 7414 // Buton Tengah, split from Buton
replace id_jan14 = 7401 if id_jan14 == 7415 // Buton Selatan, split from Buton

tempfile sus17
save `sus17'

/*----------------------------------------------------*/
   /* Section: Create Mar 2018 Baseline Variables */
/*----------------------------------------------------*/

// load Mar 2018 household dataset
u "${importable}/kor18rt_diseminasi.dta", clear


*** Household characteristics
di _N
summ R1501A
rename R1501A nhhhouse
rename R1504 area
tab R1801A

*** Asset variables
gen lpg5kgmore = R1801A == 1
tab R1801B
gen regiferator  = R1801B == 1
tab R1801C
gen airconditioner = R1801C == 1
tab R1801D
gen waterheater = R1801D == 1
tab R1801E
gen landline = R1801E == 1
tab R1801F
gen computer = R1801F == 1
tab R1801G
gen goldjewelerymin10gram = R1801G == 1
tab R1801H
gen motorcycle = R1801H == 1
tab R1801I
gen boat = R1801I == 1
tab R1801J
gen motorboat = R1801J == 1
tab R1801K
gen car = R1801K == 1

*** House characteristics
summ R1502
gen ownhouse = R1502 == 1
gen lease = R1502 == 2
gen freelease = R1502 == 3
gen companyhouse = R1502 == 4
gen otherhouse = R1502 == 5

tab R1507
gen concreteroof = R1507 == 1
gen tileroof = R1507 == 2
gen asbestosroof = R1507 == 3
gen ironsheetroof = R1507 == 4
gen bambooroof = R1507 == 5
gen shinglewoodroof = R1507 == 6
gen fiberpalmroof = R1507 == 7
gen otherroof = R1507 == 8

tab R1508
gen concretewall = R1508 == 1
gen wovenbamboowall = R1508 == 2
gen woodwall = R1508 == 3
gen wovenwoodwall = R1508 == 4
gen logwoodwall = R1508 == 5
gen bamboowall = R1508 == 6
gen otherwall = R1508 == 7

tab R1509
gen marblegranitefloor = R1509 == 1
gen ceramicfloor = R1509 == 2
gen parquettevinylfloor = R1509 == 3
gen tilefloor = R1509 == 4
gen woodfloor = R1509 == 5
gen cementbrickfloor = R1509 == 6
gen bamboofloor = R1509 == 7
gen soilfloor = R1509 == 8
gen otherfloor = R1509 == 9

*** Toilet variables
tab R1510A
gen personaltoilet = R1510A == 1
gen mutualtoilet = R1510A == 2
gen communaltoilet = R1510A == 3
gen notusetoilet = R1510A == 4
gen noaccesstotoilet = R1510A == 5

tab R1510B
di (_N - 249794) / _N
// R1510B (toilet type) variable is missing 15% of values
// this bc question not asked unless fam has personal or mutual toilet
gen goosetoilet = R1510B == 1
gen closedpitttoilet = R1510B == 2
gen unclosedpitttoilet = R1510B == 3
gen squattoilet = R1510B == 4
gen notoilet = R1510B == .		// R1510B is missing if no toilet

// NOTE: full set of interactions does not seem to make sense here, bc notoilet = (personaltoilet == 0 & mutualtoilet == 0)
// need to think more carefully about this
la var R1510A "Toilet Access"
la var R1510B "Toilet Type"
tab R1510A R1510B
gen personalgoosetoilet = personaltoilet * goosetoilet
gen personalclosedpitttoilet = personaltoilet * closedpitttoilet
gen personalunclosedpitttoilet = personaltoilet * unclosedpitttoilet
gen personalsquattoilet = personaltoilet * squattoilet
summ personalgoosetoilet personalclosedpitttoilet personalunclosedpitttoilet personalsquattoilet

gen mutualgoosetoilet = mutualtoilet * goosetoilet
gen mutualclosedpitttoilet = mutualtoilet * closedpitttoilet
gen mutualunclosedpitttoilet = mutualtoilet * unclosedpitttoilet
gen mutualsquattoilet = mutualtoilet * squattoilet
summ mutualgoosetoilet mutualclosedpitttoilet mutualunclosedpitttoilet mutualsquattoilet

tab R1510D
di _N
count if R1510A == 1 | R1510A == 2 &R1510D != .
// R1510D (final disposal site) is missing if no toilet
gen septicdisposal = R1510D == 1
gen pooldisposal = R1510D == 2
gen riverdisposal = R1510D == 3
gen pitholedisposal = R1510D == 4
gen opendisposal = R1510D == 5
gen otherdisposal = R1510D == 6

tab R1511A
gen bottleddrinkwater = R1511A == 1
gen refilldrinkwater = R1511A == 2
gen tapdrinkwater = R1511A == 3
gen pumpdrinkwater = R1511A == 4
gen protwelldrinkwater = R1511A == 5
gen unprotwelldrinkwater = R1511A == 6
gen protspringwater = R1511A == 7
gen unprotspringdrinkwater = R1511A == 8
gen riverdrinkwater = R1511A == 9
gen raindrinkwater = R1511A == 10
gen otherdrinkwater = R1511A == 11

// summ bottleddrinkwater refilldrinkwater tapdrinkwater pumpdrinkwater protwelldrinkwater unprotwelldrinkwater protspringwater unprotspringdrinkwater riverdrinkwater raindrinkwater otherdrinkwater

tab R1516A
gen bottledcookwater = R1516A == 1
gen refillcookwater = R1516A == 2
gen tapcookwater = R1516A == 3
gen pumpcookwater = R1516A == 4
gen protectedwellcookwater = R1516A == 5
gen unprotectedwellcookwater = R1516A == 6
gen protectedspringcookwater = R1516A == 7
gen unprotectedspringcookwater = R1516A == 8
gen rivercookwater = R1516A == 9
gen raincookwater = R1516A == 10
gen othercookwater = R1516A == 11

tab R1518A
gen PLNelectricity = (R1518A == 1 | R1518A == 2)
gen nonPLNelectricity = R1518A == 3
gen nonelectricity = R1518A == 4

tab R1519
gen electricity = R1519 == 1
gen lpgas5kg = R1519 == 2
gen lpgas12kg = R1519 == 3
gen lpgas3kg = R1519 == 4
gen gasorbiogas = (R1519 == 5 | R1519 == 6)
gen kerosene = R1519 == 7
gen briquettecoal = R1519 == 8
gen charcoal = R1519 == 9
gen firewood = R1519 == 10
gen otherenergy = R1519 == 11
gen notcookingathome = R1519 == 0

gen cleandrinkwater = R1511B
replace cleandrinkwater = 0 if cleandrinkwater != .
replace cleandrinkwater = 1 if inlist(R1511A, 1, 2, 3, 9, 10, 11)

// merge with individual dataset
merge 1:m URUT using "$importable/kor18ind_revisi_diseminasi.dta"
drop _m

*** Demographic variables
di _N
summ R407

gen male = R405 == 1
bysort URUT: egen totalmale = total(male)
drop male

gen female = R405 == 2
bysort URUT: egen totalfemale = total(female)
drop female

gen child = R407 == 3
bysort URUT: egen totalchild = total(child)
drop child

gen own_cell = R714 == 1
bysort URUT: egen n_cell_hh = total(own_cell)

gen usecomputer = R715 == 1
bysort URUT: egen n_computer_hh = total(usecomputer)

gen useinternet = R716 == 1
bysort URUT: egen n_internet_hh = total(useinternet)

duplicates drop URUT, force

rename EXP_CAP month_pcexp

gen urban = R105 == 1
rename R101 PROVINSI
rename KABU kabu
drop R* NU* CATATAN WI*

foreach var of var* {
	rename `var' mar18_`var'
}

rename *PROVINSI PROVINSI
rename *kabu kabu
rename *urban urban
rename *URUT URUT
rename *FWT FWT

* address split districts
gen id_mar18 = kabu
rename kabu id_jan14
replace id_jan14 = 7402 if id_jan14 == 7413 // Muna Barat, split from Muna
replace id_jan14 = 7401 if id_jan14 == 7414 // Buton Tengah, split from Buton
replace id_jan14 = 7401 if id_jan14 == 7415 // Buton Selatan, split from Buton

ds
tempfile sus18
save `sus18'


/*----------------------------------------------------*/
            /* Section: Merge and collapse */
/*----------------------------------------------------*/

// Merge in jan 2014 ID from crosswalk and collapse
*get the names from crosswalk file
use "$importable/kabpanel19802014usingUndangUndang.dta", clear
keep id_jan14 kab_jan14 id_dec04 kab_dec04
duplicates drop

// rename id_jan14 kabu_id
forvalues year = 16 / 18 {
	preserve
	merge 1:m id_jan14 using `sus`year''
	assert _m == 3 // all matched
	drop _m

	// NOTE: need 2004 ids to match with the PPLS Lasso control pool
	// drop kabu_id kab_jan14
	// rename kab_dec04 kabu_name
	// rename id_dec04 kabu_id

	// Collapse following procedure in BDT Dropbox
	if `year' == 18 {
		collapse (mean) mar`year'* [aw=FWT], by(id_jan14 urban)
	}
	else {
		collapse (mean) mar`year'* [aw=fwt], by(id_jan14 urban)
	}

	summ mar`year'*

	save "$cleaned/mar_20`year'_baseline_pool.dta", replace
	restore
}


cap log close
